package resolver.analyzer

import java.util.regex.Pattern
import org.apache.log4j.Logger

/**
 * Created by IntelliJ IDEA.
 * User: wohlgemuth
 * Date: Mar 4, 2010
 * Time: 10:42:18 PM
 * To change this template use File | Settings | File Templates.
 */
class RegularExpressionAnalyzer implements Analyzer {

  /**
   * internal logger
   */
  private Logger logger = Logger.getLogger("analyzer")

  /**
   * by default we match everything
   */
  def pattern = /.*/

  def setPattern(Pattern pattern) {
    this.pattern = pattern
  }

  public RegularExpressionAnalyzer(String pattern) {

    this.setPattern(Pattern.compile(pattern))

  }

  public RegularExpressionAnalyzer(Pattern pattern) {
    this.setPattern(pattern)
  }

  /**
   * analyzes the text and trys to find chemicals
   * @param text
   * @return
   */
  Set<String> analyze(def value) {
    Set<String> result = new HashSet<String>()

    if (value instanceof String | value instanceof InputStream | value instanceof File) {
      value.eachLine { String text ->

        //saves our results

        //save all matches in the match
        def match = (text =~ pattern)

        //saves the result
        match.each {def word ->

          if (word instanceof Collection) {
            Collection col = word
            result.add col.toArray()[0].toString()
          }
          else {
            result.add word.toString()
          }

        }

      }
    }
    else {
      throw new InternalError("please provide an argument which is string of file or inputstream")
    }
    return result;
  }

  /**
   * cleans up all the word and adds it to the result in the result
   */
  protected def cleanupMatches(def w, Set<String> result) {

    //only want to process words longer than 0
    if (w.size() > 0) {
      //replace wrong brackets
      if (w.endsWith(")")) {
        int countLeft = w.count("(")
        int countRight = w.count(")")

        if (countRight > countLeft) {
          w = w.substring(0, w.size() - 1)
        }
      }

      if (!w.matches(/\d+/)) {
        //add the word to the result list
        result.add(w)
      }
    }
  }


}
